{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5293efa6-924f-4594-805a-d8fd6b23db32",
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pip install delta-spark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b8a32498-6b93-4f87-9e42-c57aeee53c34",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "        <div>\n",
       "            <p><b>SparkContext</b></p>\n",
       "\n",
       "            <p><a href=\"http://04b9f929d420:4040\">Spark UI</a></p>\n",
       "\n",
       "            <dl>\n",
       "              <dt>Version</dt>\n",
       "                <dd><code>v3.5.0</code></dd>\n",
       "              <dt>Master</dt>\n",
       "                <dd><code>spark://spark-master:7077</code></dd>\n",
       "              <dt>AppName</dt>\n",
       "                <dd><code>MyApp</code></dd>\n",
       "            </dl>\n",
       "        </div>\n",
       "        "
      ],
      "text/plain": [
       "<SparkContext master=spark://spark-master:7077 appName=MyApp>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pyspark\n",
    "from delta import *\n",
    "\n",
    "builder = pyspark.sql.SparkSession.builder \\\n",
    "    .master(\"spark://spark-master:7077\") \\\n",
    "    .appName(\"MyApp\") \\\n",
    "    .config(\"spark.sql.extensions\", \"io.delta.sql.DeltaSparkSessionExtension\") \\\n",
    "    .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n",
    "\n",
    "spark = configure_spark_with_delta_pip(builder).getOrCreate()\n",
    "\n",
    "sc = spark.sparkContext\n",
    "sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "846a621d-43ef-4fcb-9e3f-0dd3e710665c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "data = spark.range(0, 5)\n",
    "data.write.format(\"delta\").mode(\"overwrite\").save(\"/data/delta/delta-table-qs\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f368d4a1-6be2-40f2-9aff-8824654c2e4f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+\n",
      "| id|\n",
      "+---+\n",
      "|  2|\n",
      "|  3|\n",
      "|  4|\n",
      "|  0|\n",
      "|  1|\n",
      "+---+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df = spark.read.format(\"delta\").load(\"/data/delta/delta-table-qs\")\n",
    "df.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a9ba36df-56f9-4722-8300-1807348a2234",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "data = spark.range(5, 10)\n",
    "data.write.format(\"delta\").mode(\"overwrite\").save(\"/data/delta/delta-table-qs\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "fdb6ef33-df6a-45ac-9b63-158f20e72d2c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------+-----------------------+---------+--------------------------------------+\n",
      "|version|timestamp              |operation|operationParameters                   |\n",
      "+-------+-----------------------+---------+--------------------------------------+\n",
      "|17     |2024-03-25 22:45:22.009|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|16     |2024-03-25 22:45:12.058|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|15     |2024-03-25 22:34:50.564|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|14     |2024-03-25 22:34:41.697|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|13     |2024-03-25 01:25:36.076|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|12     |2024-03-25 01:25:27.895|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|11     |2024-03-25 01:22:15.792|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|10     |2024-03-25 01:22:09.105|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|9      |2024-03-25 01:09:40.499|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "|8      |2024-03-25 01:09:36.541|WRITE    |{mode -> Overwrite, partitionBy -> []}|\n",
      "+-------+-----------------------+---------+--------------------------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from delta.tables import DeltaTable\n",
    "\n",
    "delta_table = DeltaTable.forPath(spark, \"/data/delta/delta-table-qs\")\n",
    "\n",
    "delta_table.history().select(\"version\", \"timestamp\", \"operation\", \"operationParameters\").show(10, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "17c5ee49-8ab1-4aef-b9c7-4cdfb7717e90",
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.sparkContext.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb4738eb-3bb9-464a-a526-f6cce824a97c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
